set.seed(2005) # for reproducibility
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggthemes)
library(corrplot)
## corrplot 0.84 loaded
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(leaflet)
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
##
## theme_map
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:psych':
##
## outlier
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(rpart.plot)
## Loading required package: rpart
library(rpart)
data <- read.csv("/Users/anisha/Downloads/countries.csv", encoding="UTF-8", stringsAsFactors = F)
glimpse(data)
## Rows: 188
## Columns: 21
## $ Country <chr> "Afghanistan", "Albania", "Algeria", "…
## $ Region <chr> "Middle East/Central Asia", "Northern/…
## $ Population..millions. <dbl> 29.82, 3.16, 38.48, 20.82, 0.09, 41.09…
## $ HDI <dbl> 0.46, 0.73, 0.73, 0.52, 0.78, 0.83, 0.…
## $ GDP.per.Capita <chr> "$614.66", "$4,534.37", "$5,430.57", "…
## $ Cropland.Footprint <dbl> 0.30, 0.78, 0.60, 0.33, NA, 0.78, 0.74…
## $ Grazing.Footprint <dbl> 0.20, 0.22, 0.16, 0.15, NA, 0.79, 0.18…
## $ Forest.Footprint <dbl> 0.08, 0.25, 0.17, 0.12, NA, 0.29, 0.34…
## $ Carbon.Footprint <dbl> 0.18, 0.87, 1.14, 0.20, NA, 1.08, 0.89…
## $ Fish.Footprint <dbl> 0.00, 0.02, 0.01, 0.09, NA, 0.10, 0.01…
## $ Total.Ecological.Footprint <dbl> 0.79, 2.21, 2.12, 0.93, 5.38, 3.14, 2.…
## $ Cropland <dbl> 0.24, 0.55, 0.24, 0.20, NA, 2.64, 0.44…
## $ Grazing.Land <dbl> 0.20, 0.21, 0.27, 1.42, NA, 1.86, 0.26…
## $ Forest.Land <dbl> 0.02, 0.29, 0.03, 0.64, NA, 0.66, 0.10…
## $ Fishing.Water <dbl> 0.00, 0.07, 0.01, 0.26, NA, 1.67, 0.02…
## $ Urban.Land <dbl> 0.04, 0.06, 0.03, 0.04, NA, 0.10, 0.07…
## $ Total.Biocapacity <dbl> 0.50, 1.18, 0.59, 2.55, 0.94, 6.92, 0.…
## $ Biocapacity.Deficit.or.Reserve <dbl> -0.30, -1.03, -1.53, 1.61, -4.44, 3.78…
## $ Earths.Required <dbl> 0.46, 1.27, 1.22, 0.54, 3.11, 1.82, 1.…
## $ Countries.Required <dbl> 1.60, 1.87, 3.61, 0.37, 5.70, 0.45, 2.…
## $ Data.Quality <chr> "6", "6", "5", "6", "2", "6", "3B", "2…
summary(data)
## Country Region Population..millions. HDI
## Length:188 Length:188 Min. : 0.000 Min. :0.3400
## Class :character Class :character 1st Qu.: 2.038 1st Qu.:0.5575
## Mode :character Mode :character Median : 7.970 Median :0.7200
## Mean : 37.342 Mean :0.6864
## 3rd Qu.: 24.870 3rd Qu.:0.8025
## Max. :1408.040 Max. :0.9400
## NA's :16
## GDP.per.Capita Cropland.Footprint Grazing.Footprint Forest.Footprint
## Length:188 Min. :0.0700 Min. :0.0000 Min. :0.0100
## Class :character 1st Qu.:0.3500 1st Qu.:0.0800 1st Qu.:0.1700
## Mode :character Median :0.5200 Median :0.1800 Median :0.2600
## Mean :0.5782 Mean :0.2632 Mean :0.3738
## 3rd Qu.:0.7000 3rd Qu.:0.3200 3rd Qu.:0.4600
## Max. :2.6800 Max. :3.4700 Max. :3.0300
## NA's :15 NA's :15 NA's :15
## Carbon.Footprint Fish.Footprint Total.Ecological.Footprint Cropland
## Min. : 0.000 Min. :0.0000 Min. : 0.420 Min. :0.0000
## 1st Qu.: 0.420 1st Qu.:0.0200 1st Qu.: 1.482 1st Qu.:0.1800
## Median : 1.140 Median :0.0700 Median : 2.740 Median :0.3500
## Mean : 1.805 Mean :0.1225 Mean : 3.318 Mean :0.5319
## 3rd Qu.: 2.600 3rd Qu.:0.1500 3rd Qu.: 4.640 3rd Qu.:0.5900
## Max. :12.650 Max. :0.8200 Max. :15.820 Max. :5.4200
## NA's :15 NA's :15 NA's :15
## Grazing.Land Forest.Land Fishing.Water Urban.Land
## Min. :0.0000 Min. : 0.000 Min. : 0.0000 Min. :0.00000
## 1st Qu.:0.0300 1st Qu.: 0.060 1st Qu.: 0.0300 1st Qu.:0.03000
## Median :0.1200 Median : 0.340 Median : 0.1100 Median :0.05000
## Mean :0.4566 Mean : 2.459 Mean : 0.5951 Mean :0.06711
## 3rd Qu.:0.3400 3rd Qu.: 1.170 3rd Qu.: 0.3700 3rd Qu.:0.09000
## Max. :8.2300 Max. :95.160 Max. :16.0700 Max. :0.27000
## NA's :15 NA's :15 NA's :15 NA's :15
## Total.Biocapacity Biocapacity.Deficit.or.Reserve Earths.Required
## Min. : 0.050 Min. :-14.1400 Min. :0.240
## 1st Qu.: 0.675 1st Qu.: -1.9350 1st Qu.:0.855
## Median : 1.310 Median : -0.7300 Median :1.580
## Mean : 4.020 Mean : 0.7021 Mean :1.916
## 3rd Qu.: 2.815 3rd Qu.: 0.2125 3rd Qu.:2.678
## Max. :111.350 Max. :109.0100 Max. :9.140
##
## Countries.Required Data.Quality
## Min. : 0.0200 Length:188
## 1st Qu.: 0.9425 Class :character
## Median : 1.7050 Mode :character
## Mean : 4.0374
## 3rd Qu.: 2.8475
## Max. :159.4700
##
data$GDP.per.Capita <- as.numeric(gsub('[$,]', '', data$GDP.per.Capita))
#Conversion to Factors
data$Country <- as.factor(data$Country)
data$Region <- as.factor(data$Region)
data$Data.Quality <- NULL
#Analysing Countries with Missing Data
table1_data <- data[is.na(data$HDI), c(1,2)]
rownames(table1_data) <- NULL
table1_data %>% kable(caption = "Countries with Missing Data") %>% kable_styling("striped")
Countries with Missing Data
|
Country
|
Region
|
|
Aruba
|
Latin America
|
|
Bermuda
|
North America
|
|
British Virgin Islands
|
Latin America
|
|
Cayman Islands
|
Latin America
|
|
Côte d’Ivoire
|
Africa
|
|
French Guiana
|
Latin America
|
|
French Polynesia
|
Asia-Pacific
|
|
Guadeloupe
|
Latin America
|
|
Korea, Democratic People’s Republic of
|
Asia-Pacific
|
|
Martinique
|
Latin America
|
|
Montserrat
|
Latin America
|
|
Nauru
|
Asia-Pacific
|
|
New Caledonia
|
Asia-Pacific
|
|
Réunion
|
Africa
|
|
Somalia
|
Africa
|
|
Wallis and Futuna Islands
|
Asia-Pacific
|
#Visualizing Numeric Variables for Skewness
multi.hist(data[,sapply(data, is.numeric)])

data %>% ggplot(aes(x = Total.Ecological.Footprint)) +
geom_histogram(bins = 30, aes(y = ..density..), fill = "indianred1") +
geom_density(alpha = 0.2, fill = "indianred1") +
theme_fivethirtyeight() +
ggtitle("Distribution of Total Ecological Footprint") +
theme(axis.title = element_text(), axis.title.x = element_text()) +
geom_vline(xintercept = mean(data$Total.Ecological.Footprint), size = 2, linetype = 3) +
annotate("text", x = 7, y = 0.35, label = "Average Footprint per Capita")

data %>% ggplot(aes(x = Total.Biocapacity)) +
geom_histogram(bins = 30, aes(y = ..density..), fill = "lightgreen") +
geom_density(alpha = 0.2, fill = " lightgreen") +
theme_fivethirtyeight() +
ggtitle("Distribution of Total Biocapacity") +
theme(axis.title = element_text(), axis.title.x = element_text()) +
geom_vline(xintercept = mean(data$Total.Biocapacity), size = 2, linetype = 3) +
annotate("text", x = 7, y = 0.35, label = "Avg Biocapacity per Person")

data %>% ggplot(aes(x = Biocapacity.Deficit.or.Reserve)) +
geom_histogram(bins = 30, aes(y = ..density..), fill = "lightyellow") +
geom_density(alpha = 0.2, fill = "lightyellow") +
theme_fivethirtyeight() +
ggtitle("Distribution of Ecoloigical Deficit/Reserve") +
theme(axis.title = element_text(), axis.title.x = element_text()) +
geom_vline(xintercept = mean(data$Biocapacity.Deficit.or.Reserve), size = 2, linetype = 3) +
annotate("text", x = 7, y = 0.35, label = "Avg Deficit/Reserve per Person")

data %>%
group_by(Country) %>%
summarise(EcoFootprintMedian = median(`Total.Ecological.Footprint`, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Country = reorder(Country,EcoFootprintMedian)) %>%
arrange(desc(EcoFootprintMedian)) %>%
head(20) %>%
ggplot(aes(x = Country,y = EcoFootprintMedian)) +
geom_bar(stat='identity', fill = "indianred1") +
geom_text(aes(x = Country, y = 1, label = paste0("(",EcoFootprintMedian,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = "black",
fontface = 'italic') +
labs(x = 'Countries',
y = 'Ecological Footprint',
title = 'Countries With Highest Footprint') +
coord_flip() + theme(legend.position = "")

data %>%
group_by(Country) %>%
summarise(EcoFootprintMedian = median(`Total.Ecological.Footprint`, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Country = reorder(Country,EcoFootprintMedian)) %>%
arrange(desc(EcoFootprintMedian)) %>%
tail(20) %>%
ggplot(aes(x = Country,y = EcoFootprintMedian)) +
geom_bar(stat='identity', fill = "indianred1") +
geom_text(aes(x = Country, y = 1, label = paste0("(",EcoFootprintMedian,")",sep="")),
hjust=0, vjust=.5, size = 2, colour = "black",
fontface = 'italic') +
labs(x = 'Countries',
y = 'Ecological Footprint',
title = 'Countries With Lowest Footprint') +
coord_flip() + theme(legend.position = "")

data %>%
group_by(Country) %>%
summarise(EcoBiocapcityMedian = median(`Total.Biocapacity`, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Country = reorder(Country,EcoBiocapcityMedian)) %>%
arrange(desc(EcoBiocapcityMedian)) %>%
head(20) %>%
ggplot(aes(x = Country,y = EcoBiocapcityMedian)) +
geom_bar(stat='identity', fill = "lightgreen") +
geom_text(aes(x = Country, y = 1, label = paste0("(",EcoBiocapcityMedian,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = "black",
fontface = 'italic') +
labs(x = 'Countries',
y = 'Biocapacity',
title = 'Countries With Highest Biocapacities') +
coord_flip() + theme(legend.position = "")

data %>%
group_by(Country) %>%
summarise(EcoDeficitReserveMedian = median(`Biocapacity.Deficit.or.Reserve`, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Country = reorder(Country,EcoDeficitReserveMedian)) %>%
arrange(desc(EcoDeficitReserveMedian)) %>%
head(10) %>%
ggplot(aes(x = Country,y = EcoDeficitReserveMedian)) +
geom_bar(stat='identity', fill = "green") +
geom_text(aes(x = Country, y = 1, label = paste0("(",EcoDeficitReserveMedian,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = "black",
fontface = 'italic') +
labs(x = 'Countries',
y = 'Ecological Reserves',
title = 'Countries With Largest Ecological Reserves') +
coord_flip() + theme(legend.position = "")

data %>%
group_by(Country) %>%
summarise(EcoDeficitReserveMedian = median(`Biocapacity.Deficit.or.Reserve`, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Country = reorder(Country,EcoDeficitReserveMedian)) %>%
arrange(desc(EcoDeficitReserveMedian)) %>%
tail(10) %>%
ggplot(aes(x = Country,y = EcoDeficitReserveMedian)) +
geom_bar(stat='identity', fill = "indianred1") +
geom_text(aes(x = Country, y = 1, label = paste0("(",EcoDeficitReserveMedian,")",sep="")),
hjust=0, vjust=.4, size = 1.5, colour = "black",
fontface = 'italic') +
labs(x = 'Countries',
y = 'Ecological Deficits',
title = 'Countries With Largest Ecological Deficits') +
coord_flip() + theme(legend.position = "")

data %>%
group_by(Country) %>%
summarise(PopMedian = median(`Population..millions.`, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Country = reorder(Country,PopMedian)) %>%
arrange(desc(PopMedian)) %>%
head(10) %>%
ggplot(aes(x = Country,y = PopMedian)) +
geom_bar(stat='identity', fill = "yellow") +
geom_text(aes(x = Country, y = 1, label = paste0("(",PopMedian,")",sep="")),
hjust=0, vjust=.4, size = 1.5, colour = "black",
fontface = 'italic') +
labs(x = 'Countries',
y = 'Population (in millions)',
title = 'Countries with Highest Population') +
coord_flip() + theme(legend.position = "")

data %>%
group_by(Country) %>%
summarise(GDPMedian = median(`GDP.per.Capita`, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Country = reorder(Country,GDPMedian)) %>%
arrange(desc(GDPMedian)) %>%
head(10) %>%
ggplot(aes(x = Country,y = GDPMedian)) +
geom_bar(stat='identity', fill = "olivedrab") +
geom_text(aes(x = Country, y = 1, label = paste0("(",GDPMedian,")",sep="")),
hjust=0, vjust=.4, size = 1.5, colour = "black",
fontface = 'italic') +
labs(x = 'Countries',
y = 'GDP per Capita',
title = 'Countries with Highest GDP per Capita') +
coord_flip() + theme(legend.position = "")

data %>%
group_by(Country) %>%
summarise(HDIMedian = median(`HDI`, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Country = reorder(Country,HDIMedian)) %>%
arrange(desc(HDIMedian)) %>%
head(10) %>%
ggplot(aes(x = Country,y = HDIMedian)) +
geom_bar(stat='identity', fill = "violetred") +
geom_text(aes(x = Country, y = 1, label = paste0("(",HDIMedian,")",sep="")),
hjust=0, vjust=.4, size = 1.5, colour = "black",
fontface = 'italic') +
labs(x = 'Countries',
y = 'HDI',
title = 'Countries with Highest HDI') +
coord_flip() + theme(legend.position = "")

#Correlation Plots
k <- data[, sapply(data, is.numeric)]
k <- k[complete.cases(k), ]
korelacija <- cor(k)
corrplot(korelacija, method = "color", tl.cex = 0.825, title = "Pearson's Correlation", mar=c(0,0,1,0))

k2 <- data[, sapply(data, is.numeric)]
k2 <- k2[complete.cases(k2), ]
korelacija2 <- cor(k2, method = "spearman")
corrplot(korelacija2, method = "color", tl.cex = 0.825, title = "Spearman's Correlation", mar = c(0,0,1,0))

#Regional Breakdowns:
#Number of Countries in each Region
data %>% group_by(Region) %>% tally() %>%
ggplot(aes(x = reorder(Region, n), n)) +
geom_bar(stat = "identity", fill = "lightsalmon") +
theme_fivethirtyeight() +
ggtitle("Number of Countries in each Region") +
geom_text(aes(x = Region, y = 1, label = paste0(n)),
hjust=0.15, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') + coord_flip()

#Total Footprint of each Region
data %>% group_by(Region) %>% summarise(Total = sum(Total.Ecological.Footprint)) %>%
ggplot(aes(Region, Total, fill = Region)) + geom_histogram(stat = "identity") +
theme_fivethirtyeight() +
ggtitle("Total Ecological Footprint by Region") +
theme(axis.text.x = element_text(angle = 20, vjust = 0.65))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

#Biocapacities in each Region
data %>% group_by(Region) %>% summarise(Total = sum(Total.Biocapacity)) %>%
ggplot(aes(Region, Total, fill = Region)) + geom_histogram(stat = "identity") +
theme_fivethirtyeight() +
ggtitle("Total Biocapacity by Region") +
theme(axis.text.x = element_text(angle = 20, vjust = 0.65))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

#Ecological Deficit/Reserve in each Region
data %>% group_by(Region) %>% summarise(Total = sum(Biocapacity.Deficit.or.Reserve)) %>%
ggplot(aes(Region, Total, fill = Region)) + geom_histogram(stat = "identity") +
theme_fivethirtyeight() +
ggtitle("Total Deficit/Reserve by Region") +
theme(axis.text.x = element_text(angle = 20, vjust = 0.65))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

#Plotting Footprint over HDI by Region and Population
by_region <- data %>%
group_by(HDI, Region) %>%
summarize(FootprintMedian = median(`Total.Ecological.Footprint`),
PopulationMedian = median(`Population..millions.`))
## `summarise()` has grouped output by 'HDI'. You can override using the `.groups` argument.
ggplot(by_region, aes(x = HDI, y = FootprintMedian, color = Region, size = PopulationMedian)) +
geom_point(alpha=0.7) + ylab("Ecological Footprint") + labs(title="Countries' Footprints over HDI by Region and Population") + scale_color_manual(values = c("mediumseagreen", "orange", "mediumpurple", "mediumvioletred", "lightskyblue" ,"khaki1" ,"khaki4"))+ xlab("HDI") +ylab("Footprint")
## Warning: Removed 4 rows containing missing values (geom_point).

#Plotting Footprint over GDP by Region and Population
by_region <- data %>%
group_by(GDP.per.Capita, Region) %>%
summarize(FootprintMedian = median(`Total.Ecological.Footprint`),
PopulationMedian = median(`Population..millions.`))
## `summarise()` has grouped output by 'GDP.per.Capita'. You can override using the `.groups` argument.
ggplot(by_region, aes(x = GDP.per.Capita, y = FootprintMedian, color = Region, size = PopulationMedian)) +
geom_point(alpha=0.7) + ylab("Ecological Footprint") + labs(title="Countries' Footprints over GDP per Capita by Region and Population") + scale_color_manual(values = c("mediumseagreen", "orange", "mediumpurple", "mediumvioletred", "lightskyblue" ,"khaki1" ,"khaki4"))+ xlab("GDP") +ylab("Footprint")
## Warning: Removed 4 rows containing missing values (geom_point).

#Plotting Deficit/Reserve over HDI by Region and Population
by_region <- data %>%
group_by(HDI, Region) %>%
summarize(DeficitMedian = median(`Biocapacity.Deficit.or.Reserve`),
PopulationMedian = median(`Population..millions.`))
## `summarise()` has grouped output by 'HDI'. You can override using the `.groups` argument.
ggplot(by_region, aes(x = HDI, y = DeficitMedian, color = Region, size = PopulationMedian)) +
geom_point(alpha=0.7) + ylab("Ecological Deficit/Reserve") + labs(title="Countries' Deficit/Reserve over HDI by Region and Population") + scale_color_manual(values = c("mediumseagreen", "orange", "mediumpurple", "mediumvioletred", "lightskyblue" ,"khaki1" ,"khaki4"))+ xlab("HDI") +ylab("Deficit/Reserve")
## Warning: Removed 4 rows containing missing values (geom_point).

#Plotting Deficit/Reserve over GDP by Region and Population
by_region <- data %>%
group_by(GDP.per.Capita, Region) %>%
summarize(DeficitMedian = median(`Biocapacity.Deficit.or.Reserve`),
PopulationMedian = median(`Population..millions.`))
## `summarise()` has grouped output by 'GDP.per.Capita'. You can override using the `.groups` argument.
ggplot(by_region, aes(x = GDP.per.Capita, y = DeficitMedian, color = Region, size = PopulationMedian)) +
geom_point(alpha=0.7) + ylab("Ecological Deficit/Reserve") + labs(title="Countries' Deficit/Reserve over GDP per Capita by Region and Population") + scale_color_manual(values = c("mediumseagreen", "orange", "mediumpurple", "mediumvioletred", "lightskyblue" ,"khaki1" ,"khaki4"))+ xlab("GDP") +ylab("Deficit/Reserve")
## Warning: Removed 4 rows containing missing values (geom_point).

#Plotting GDP over HDI by Region and Population
by_region <- data %>%
group_by(HDI, Region) %>%
summarize(GDPMedian = median(`GDP.per.Capita`),
PopulationMedian = median(`Population..millions.`))
## `summarise()` has grouped output by 'HDI'. You can override using the `.groups` argument.
ggplot(by_region, aes(x = HDI, y = GDPMedian, color = Region, size = PopulationMedian)) + geom_point(alpha=0.7) + ylab("GDP Per Capita") + labs(title="Countries' Human Developent Index over GDP Per Capita by Region and Population") + scale_color_manual(values = c("mediumseagreen", "orange", "mediumpurple", "mediumvioletred", "lightskyblue" ,"khaki1" ,"khaki4")) + xlab("HDI") + ylab("GDP Per Capita")
## Warning: Removed 5 rows containing missing values (geom_point).

#Linear Regression Model
#Simple Linear Regression using HDI as the single predictor
LinearModelHDI <- lm(Total.Ecological.Footprint ~ HDI, data = data)
summary(LinearModelHDI)
##
## Call:
## lm(formula = Total.Ecological.Footprint ~ HDI, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6459 -0.9784 -0.3301 0.6633 10.3107
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.3021 0.5423 -7.933 2.74e-13 ***
## HDI 11.0241 0.7706 14.306 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.572 on 170 degrees of freedom
## (16 observations deleted due to missingness)
## Multiple R-squared: 0.5462, Adjusted R-squared: 0.5436
## F-statistic: 204.7 on 1 and 170 DF, p-value: < 2.2e-16
#Exponential Linear Regression using HDI as the single predictor
LinearModelHDI2 <- lm(Total.Ecological.Footprint ~ I(exp(HDI)), data = data)
summary(LinearModelHDI2)
##
## Call:
## lm(formula = Total.Ecological.Footprint ~ I(exp(HDI)), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5625 -0.9288 -0.4152 0.5897 10.0971
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.3649 0.7785 -10.74 <2e-16 ***
## I(exp(HDI)) 5.7853 0.3829 15.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.525 on 170 degrees of freedom
## (16 observations deleted due to missingness)
## Multiple R-squared: 0.5731, Adjusted R-squared: 0.5706
## F-statistic: 228.3 on 1 and 170 DF, p-value: < 2.2e-16
#Linear Regression with Squared Term using HDI as the single predictor
LinearModelHDI3 <- lm(Total.Ecological.Footprint ~ HDI + I(HDI**2), data = data)
summary(LinearModelHDI3)
##
## Call:
## lm(formula = Total.Ecological.Footprint ~ HDI + I(HDI^2), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5236 -0.8254 -0.2381 0.3253 9.5480
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.502 1.985 3.275 0.001280 **
## HDI -23.827 6.238 -3.820 0.000188 ***
## I(HDI^2) 26.481 4.709 5.623 7.61e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.447 on 169 degrees of freedom
## (16 observations deleted due to missingness)
## Multiple R-squared: 0.6178, Adjusted R-squared: 0.6132
## F-statistic: 136.6 on 2 and 169 DF, p-value: < 2.2e-16
linearRegression <- ggplot(data, aes(HDI, Total.Ecological.Footprint)) +
geom_point(aes(text = Country)) +
geom_smooth(method= "lm", color = "red", linetype = 1, se=F) +
geom_smooth(method= "lm", formula = (y ~ x + I(x**2)), color = "blue", linetype = 2, se=F) +
ggtitle("Simple Linear Regression Model (HDI)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(linearRegression, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
LinearModelGDP <- lm(Total.Ecological.Footprint ~ GDP.per.Capita + I(GDP.per.Capita**2), data = data)
summary(LinearModelGDP)
##
## Call:
## lm(formula = Total.Ecological.Footprint ~ GDP.per.Capita + I(GDP.per.Capita^2),
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.3315 -0.7689 -0.2931 0.6164 6.3890
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.738e+00 1.432e-01 12.139 < 2e-16 ***
## GDP.per.Capita 1.344e-04 1.284e-05 10.475 < 2e-16 ***
## I(GDP.per.Capita^2) -5.874e-10 1.532e-10 -3.835 0.000177 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.351 on 170 degrees of freedom
## (15 observations deleted due to missingness)
## Multiple R-squared: 0.6678, Adjusted R-squared: 0.6639
## F-statistic: 170.9 on 2 and 170 DF, p-value: < 2.2e-16
linearRegression2 <- ggplot(data, aes(GDP.per.Capita, Total.Ecological.Footprint)) +
geom_point(aes(text = Country)) +
geom_smooth(method= "lm", color = "red", linetype = 1, se=F) +
geom_smooth(method= "lm", formula = (y ~ x + I(x**2)), color = "blue", linetype = 2, se=F) +
ggtitle("Simple Linear Regression Model (GDP per Capita)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(linearRegression2, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
#Multiple Linear Regression using HDI and GDP per Capita
MultipleModel<- lm(Total.Ecological.Footprint ~ GDP.per.Capita + HDI + I(GDP.per.Capita**2) + I(HDI**2), data = data)
summary(MultipleModel)
##
## Call:
## lm(formula = Total.Ecological.Footprint ~ GDP.per.Capita + HDI +
## I(GDP.per.Capita^2) + I(HDI^2), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8412 -0.7107 -0.1608 0.5324 5.3628
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.853e+00 2.263e+00 -1.261 0.20920
## GDP.per.Capita 8.304e-05 2.503e-05 3.318 0.00111 **
## HDI 1.100e+01 7.758e+00 1.419 0.15791
## I(GDP.per.Capita^2) -1.535e-10 2.077e-10 -0.739 0.46083
## I(HDI^2) -5.035e+00 6.603e+00 -0.762 0.44690
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.259 on 166 degrees of freedom
## (17 observations deleted due to missingness)
## Multiple R-squared: 0.715, Adjusted R-squared: 0.7081
## F-statistic: 104.1 on 4 and 166 DF, p-value: < 2.2e-16
#Note: Did not use Population because it had very low Pearson/Spearman Coorelations to Footprint
MultipleModel2<- lm(Biocapacity.Deficit.or.Reserve ~ Total.Ecological.Footprint + Total.Biocapacity + I(Total.Ecological.Footprint**2) + I(Total.Biocapacity**2), data = data)
summary(MultipleModel2)
##
## Call:
## lm(formula = Biocapacity.Deficit.or.Reserve ~ Total.Ecological.Footprint +
## Total.Biocapacity + I(Total.Ecological.Footprint^2) + I(Total.Biocapacity^2),
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0101987 -0.0001518 -0.0000773 0.0001638 0.0105052
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.374e-04 9.140e-04 0.260 0.795
## Total.Ecological.Footprint -1.000e+00 4.190e-04 -2386.979 <2e-16 ***
## Total.Biocapacity 9.999e-01 1.084e-04 9222.256 <2e-16 ***
## I(Total.Ecological.Footprint^2) -1.369e-06 3.644e-05 -0.038 0.970
## I(Total.Biocapacity^2) 4.949e-07 1.128e-06 0.439 0.661
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005117 on 183 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 2.474e+08 on 4 and 183 DF, p-value: < 2.2e-16
linearRegression3 <- ggplot(data, aes(Cropland, Total.Ecological.Footprint)) +
geom_point(aes(text = Country)) +
geom_smooth(method= "lm", color = "red", linetype = 1, se=F) +
geom_smooth(method= "lm", formula = (y ~ x + I(x**2)), color = "blue", linetype = 2, se=F) +
ggtitle("Simple Linear Regression Model (Cropland)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(linearRegression3, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
linearRegression4 <- ggplot(data, aes(Forest.Land, Total.Ecological.Footprint)) +
geom_point(aes(text = Country)) +
geom_smooth(method= "lm", color = "red", linetype = 1, se=F) +
geom_smooth(method= "lm", formula = (y ~ x + I(x**2)), color = "blue", linetype = 2, se=F) +
ggtitle("Simple Linear Regression Model (Forest Land)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(linearRegression4, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
linearRegression5 <- ggplot(data, aes(Fishing.Water, Total.Ecological.Footprint)) +
geom_point(aes(text = Country)) +
geom_smooth(method= "lm", color = "red", linetype = 1, se=F) +
geom_smooth(method= "lm", formula = (y ~ x + I(x**2)), color = "blue", linetype = 2, se=F) +
ggtitle("Simple Linear Regression Model (Fishing Land)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(linearRegression5, tooltip = "text")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing non-finite values (stat_smooth).